import lxml.etree as le

# meiju1.html使用xpath可以搜索到
# with open('meiju1.html','r',encoding='utf-8') as f:
#     html = f.read()
#     html_x = le.HTML(html)
#     # print(html_x)
#     title_s = html_x.xpath('//div[contains(@class,"threadlist_title pull_left j_th_tit")]/a/text()')
#     for title in title_s:
#         print(title)
#

#meiju2.html,注释了代码，xpath只能读取到未注释的代码，所以只能正则表达式
#在页面中使用xpath可以使用，但在Python中不能使用
import re
with open('meiju2.html','r',encoding='utf-8') as f:
    html = re.sub('\n','',f.read())
    title_pattern  = '<div class="threadlist_title pull_left j_th_tit ">.*?<a.*?>(.*?)</a>'
    title_s = re.findall(title_pattern,html)
    for title in title_s:
        print(title)

